




/*

Merges main data to make the single dataset to be used in all estimations 

*/


	
use "public_data/gaez_africa.dta", clear

	/*
	Table B1 (this file is also used to compute the number of observations with statewise dominance)
	preserve
	keep if point_x>34.9
	keep if point_x<35
	keep if point_y<-1.8
	keep if point_y>-1.9
	keep point* *_2000 *_19*
	keep point* phaseolus* rapeseed* cassava* banana* wpotato* sorgum*
	reshape long  phaseolus rapeseed cassava banana wpotato sorgum,i(point_x point_y) j(year)
	sort phaseolus sorgum wpotato banana rapeseed
	restore
	*/
	

foreach xcrop of varlist *1986{
	gen inter  = 0
	gen inter3 = 0
	forvalues xyear = 1986(1)2000{
		local ycrop = subinstr("`xcrop'","1986","`xyear'",.)
		qui replace inter 	= inter  + 1
		qui replace inter3 	= inter3 + 1 if `ycrop'==0
	}
	local crop_null 			= subinstr("`xcrop'","1986","",.) + "_null"
	qui gen `crop_null' 		= inter3/inter
	drop inter*
}

keep country adist border_id point* *_caparhigh *_prdnt *_areat  ncropswall careawall *_null *_price*

sort point_x point_y

save "public_data/g30.dta", replace

	

* Ethnic homeland
use "public_data/distance_murdock_all.dta", clear

keep ethnic_name point_x point_y
replace ethnic_name = trim(lower(ethnic_name))
drop if index(ethnic_name,"uninhabited")>0
qui drop if ethnic_name==""
sort point_x point_y
merge 1:1 point_x point_y using "public_data/g30.dta",
qui keep if _merge==3
drop _merge
replace ethnic_name = trim(lower(ethnic_name))
sort country
qui save "public_data/g30.dta", replace


* World Bank Rule of Law	
use "private_data/wgidataset.dta", clear
rename countryname country
keep if year==1996
keep country rle
qui drop  if rle==.
keep country rle
qui do "programs/edcc_rename_countries.do"
sort country
qui merge 1:m country using "public_data/g30.dta",
qui drop if _merge==1
drop _merge	
sort point_x point_y
	




/*
	Defines main variables and restrict sample of observations to ethnic homelands with observations on both sides of a border with positive max(potential yield) 
*/



*Keep ethnic homelands that cross a border, and well-defined Rule of Law indices
egen borderethnic = group(border_id ethnic_name)
egen ncountry = group(country)
bys border_id: egen intermin 	= min(ncountry)
bys border_id: egen intermax 	= max(ncountry)
drop if intermin==intermax
gen interb = intermin*1000 + intermax
gen border = interb
drop inter*
gen groupfe = borderethnic
drop if groupfe==.
sort point_x point_y


* Defines varofi, the main explanatory variable of interest
bys groupfe: egen intermin  = min(rle)
bys groupfe: egen intermax  = max(rle)
gen varofi			= 0
qui replace varofi	= 1 if intermax==rle
drop inter*
label var varofi  		"Rule of Law"



*Keeps observations close to the border
keep if adist>10
keep if adist<=100


*Removes ethnic areas with no plot with positive productivity (e.g. desert) within 100 km of the border
capture drop inter
gen inter = 0
foreach var1 of varlist *_caparhigh {
	qui replace inter = inter + 1 if `var1'>0&`var1'<. 
}
gen ppwall = inter
drop inter
bys country groupfe: egen inter = max(ppwall)
bys groupfe: egen intermin = min(inter)
drop if intermin==0
drop inter*



*Drops if no difference in varofi for an ethnic group x border
gen inter = varofi
bys groupfe: egen intermin=min(inter)
bys groupfe: egen intermax=max(inter)
keep if intermax-intermin>0
drop inter*



*Defines polynomial of distance to the border
gen	rdist			= adist
replace	rdist		= - adist	if varofi==0
gen rdist1in    	= varofi*rdist
gen rdist1out  	= (1-varofi)*rdist
gen rdist2in    	= varofi*rdist^2
gen rdist2out  	= (1-varofi)*rdist^2
gen rdist3in    	= varofi*rdist^3
gen rdist3out  	= (1-varofi)*rdist^3
drop rdist


* Defines identifier
egen identifier = group(point_x point_y)

* Defines weight variable
gen weight = .




*Computes crops that would be most profitable to grow according to (observed) 2000 prices in US and France

gen highusa1 	= " "
gen interusa1 = 0
gen highother1 	= " "
gen interother1 = 0
foreach crop_areat of varlist *_areat{
	local crop 					= subinstr("`crop_areat'","_areat","",.)
	local crop_priceusa 		= "`crop'" + "_priceusa"
	local crop_priceother 	= "`crop'" + "_priceother"
	
	local border_areat = "border_areat"
	bys groupfe: egen border_areat = sum(`crop_areat')
	bys groupfe: egen inter = sum(careawall)
	gen inter2 = border_areat/inter
	qui replace border_areat = inter2>.01
	drop inter inter2
		
	if "`crop'"=="cassavayam"{
		local crop = "cassava"
	}
	if "`crop'"== "cocoacoffeetea"{
		local crop = "cocoa"
	}
	if "`crop'"== "wspotato"{
		local crop = "spotato"
	}
	if "`crop'"== "bancocon"{
		local crop = "coconut"
	}
	if "`crop'"== "vegetables"{
		local crop = "tomato"
	}
	if "`crop'"== "pulses"{
		local crop = "phaseolus"
	}
	if "`crop'"== "cerealoth"{
		local crop = "barley"
	}
	local crop_caparhigh 		= "`crop'" + "_caparhigh"
	local crop_priceusa 		= "`crop'" + "_priceusa"
	local crop_priceother 		= "`crop'" + "_priceother"
		
	
	qui replace highusa1 		= subinstr("`crop_areat'","_areat","",.) 		if `crop_priceusa'*`crop_caparhigh'  >=interusa1&`crop_priceusa'<.
	qui replace interusa1 		= `crop_priceusa'*`crop_caparhigh' 				if `crop_priceusa'*`crop_caparhigh'  >=interusa1&`crop_priceusa'<.
	qui replace highother1 		= subinstr("`crop_areat'","_areat","",.) 		if `crop_priceother'*`crop_caparhigh'>=interother1&`crop_priceother'<.
	qui replace interother1 	= `crop_priceother'*`crop_caparhigh' 			if `crop_priceother'*`crop_caparhigh'>=interother1&`crop_priceother'<.
	
	drop border_areat
}
*


gen grownusa	= 0
gen grownother	= 0
foreach crop_areat of varlist *_areat{
	local crop 							= subinstr("`crop_areat'","_areat","",.)
	qui replace grownusa 		= 1 if highusa=="`crop'"&`crop_areat'>0
	qui replace grownother 		= 1 if highother=="`crop'"&`crop_areat'>0
}
*

drop highusa highother inter*

sort point_x point_y
save "public_data/g30.dta", replace








* Merge with information on population and distance to city (for robustness checks - data for observations of last file only)
qui use "public_data/urbanizationbypoint.dta", clear
replace distcity = ln(distcity)	
replace pop00 = ln(pop00+1)
sort point_x point_y
merge 1:1 point_x point_y using "public_data/g30.dta", 
qui drop if _merge==1
drop _merge
sort point_x point_y
save "public_data/g30.dta", replace



